{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### TensorFlow installation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`pip3 install tensorflow`\n",
    "\n",
    "or\n",
    "\n",
    "`pip3 install tensorflow-gpu`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### OpenAI Gym installation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On OSX: \n",
    "\n",
    "`brew install cmake boost boost-python sdl2 swig wget`\n",
    " \n",
    "On Ubuntu 16.04:\n",
    "\n",
    "`apt-get install -y python-pyglet python3-opengl zlib1g-dev libjpeg-dev patchelf cmake swig libboost-all-dev libsdl2-dev libosmesa6-dev xvfb ffmpeg`\n",
    "\n",
    "On Ubuntu 18.04\n",
    "\n",
    "`sudo apt install -y python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev libosmesa6-dev patchelf ffmpeg xvfb `\n",
    "\n",
    "Then:\n",
    "\n",
    "```\n",
    "git clone https://github.com/openai/gym.git \n",
    "\n",
    "cd gym\n",
    "\n",
    "pip install -e '.[all]'\n",
    "```\n",
    "\n",
    "PyBox2D:\n",
    "\n",
    "```\n",
    "git clone https://github.com/pybox2d/pybox2d\n",
    "cd pybox2d\n",
    "pip3 install -e .\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Duckietown installation\n",
    "\n",
    "```\n",
    "git clone https://github.com/duckietown/gym-duckietown.git\n",
    "cd gym-duckietown\n",
    "pip3 install -e .\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Roboschool installation\n",
    "\n",
    "```\n",
    "git clone https://github.com/openai/roboschool\n",
    "cd roboschool\n",
    "ROBOSCHOOL_PATH=`pwd`\n",
    "git clone https://github.com/olegklimov/bullet3 -b roboschool_self_collision\n",
    "mkdir bullet3/build\n",
    "cd    bullet3/build\n",
    "cmake -DBUILD_SHARED_LIBS=ON -DUSE_DOUBLE_PRECISION=1 -DCMAKE_INSTALL_PREFIX:PATH=$ROBOSCHOOL_PATH/roboschool/cpp-household/bullet_local_install -DBUILD_CPU_DEMOS=OFF -DBUILD_BULLET2_DEMOS=OFF -DBUILD_EXTRAS=OFF  -DBUILD_UNIT_TESTS=OFF -DBUILD_CLSOCKET=OFF -DBUILD_ENET=OFF -DBUILD_OPENGL3_DEMOS=OFF ..\n",
    "\n",
    "make -j4\n",
    "make install\n",
    "cd ../..\n",
    "pip3 install -e $ROBOSCHOOL_PATH\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RL cycle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "# create the environment \n",
    "env = gym.make(\"CartPole-v1\")\n",
    "# reset the environment before starting\n",
    "env.reset()\n",
    "\n",
    "# loop 10 times\n",
    "for i in range(10):\n",
    "    # take a random action\n",
    "    env.step(env.action_space.sample())\n",
    "    # render the game\n",
    "    env.render()\n",
    "\n",
    "# close the environment\n",
    "env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n",
      "Episode 0 finished, reward:15\n",
      "Episode 1 finished, reward:13\n",
      "Episode 2 finished, reward:20\n",
      "Episode 3 finished, reward:22\n",
      "Episode 4 finished, reward:13\n",
      "Episode 5 finished, reward:18\n",
      "Episode 6 finished, reward:15\n",
      "Episode 7 finished, reward:12\n",
      "Episode 8 finished, reward:58\n",
      "Episode 9 finished, reward:15\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "# create and initialize the environment\n",
    "env = gym.make(\"CartPole-v1\")\n",
    "env.reset()\n",
    "\n",
    "# play 10 games\n",
    "for i in range(10):\n",
    "    # initialize the variables\n",
    "    done = False\n",
    "    game_rew = 0\n",
    "\n",
    "    while not done:\n",
    "        # choose a random action\n",
    "        action = env.action_space.sample()\n",
    "        # take a step in the environment\n",
    "        new_obs, rew, done, info = env.step(action)\n",
    "        game_rew += rew\n",
    "    \n",
    "        # when is done, print the cumulative reward of the game and reset the environment\n",
    "        if done:\n",
    "            print('Episode %d finished, reward:%d' % (i, game_rew))\n",
    "            env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.\u001b[0m\n",
      "Box(4,)\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "env = gym.make('CartPole-v1')\n",
    "print(env.observation_space)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Discrete(2)\n"
     ]
    }
   ],
   "source": [
    "print(env.action_space)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "0\n",
      "0\n"
     ]
    }
   ],
   "source": [
    "print(env.action_space.sample())\n",
    "print(env.action_space.sample())\n",
    "print(env.action_space.sample())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]\n"
     ]
    }
   ],
   "source": [
    "print(env.observation_space.low)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]\n"
     ]
    }
   ],
   "source": [
    "print(env.observation_space.high)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TensorFlow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\users\\andrea\\appdata\\local\\programs\\python\\python35\\lib\\site-packages\\h5py\\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensor(\"add:0\", shape=(), dtype=int32)\n",
      "7\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "# create two constants: a and b\n",
    "a = tf.constant(4)\n",
    "b = tf.constant(3)\n",
    "\n",
    "# perform a computation\n",
    "c = a + b\n",
    "print(c) # print the shape of c\n",
    "\n",
    "# create a session\n",
    "session = tf.Session()\n",
    "# run the session. It compute the sum\n",
    "res = session.run(c)\n",
    "print(res) # print the actual result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reset the graph\n",
    "tf.reset_default_graph()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "()\n"
     ]
    }
   ],
   "source": [
    "a = tf.constant(1)\n",
    "print(a.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5,)\n"
     ]
    }
   ],
   "source": [
    "# array of five elements\n",
    "b = tf.constant([1,2,3,4,5])\n",
    "print(b.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 2 3]\n"
     ]
    }
   ],
   "source": [
    "#NB: a can be of any type of tensor\n",
    "a = tf.constant([1,2,3,4,5])\n",
    "first_three_elem = a[:3]\n",
    "fourth_elem = a[3]\n",
    "\n",
    "sess = tf.Session()\n",
    "print(sess.run(first_three_elem))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4\n"
     ]
    }
   ],
   "source": [
    "print(sess.run(fourth_elem))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Constant"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensor(\"a_const:0\", shape=(4,), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "a = tf.constant([1.0, 1.1, 2.1, 3.1], dtype=tf.float32, name='a_const')\n",
    "print(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Placeholder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10.1 10.2 10.3]]\n"
     ]
    }
   ],
   "source": [
    "a = tf.placeholder(shape=(1,3), dtype=tf.float32)\n",
    "b = tf.constant([[10,10,10]], dtype=tf.float32)\n",
    "\n",
    "c = a + b\n",
    "\n",
    "sess = tf.Session()\n",
    "res = sess.run(c, feed_dict={a:[[0.1,0.2,0.3]]})\n",
    "print(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.reset_default_graph()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensor(\"Placeholder:0\", shape=(?, 3), dtype=float32)\n",
      "[[10.1 10.2 10.3]]\n",
      "[[7. 7. 7.]\n",
      " [7. 7. 7.]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "# NB: the fist dimension is 'None', meaning that it can be of any lenght\n",
    "a = tf.placeholder(shape=(None,3), dtype=tf.float32)\n",
    "b = tf.placeholder(shape=(None,3), dtype=tf.float32)\n",
    "\n",
    "c = a + b\n",
    "\n",
    "print(a)\n",
    "\n",
    "sess = tf.Session()\n",
    "print(sess.run(c, feed_dict={a:[[0.1,0.2,0.3]], b:[[10,10,10]]}))\n",
    "\n",
    "v_a = np.array([[1,2,3],[4,5,6]])\n",
    "v_b = np.array([[6,5,4],[3,2,1]])\n",
    "print(sess.run(c, feed_dict={a:v_a, b:v_b}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10.1 10.2 10.3]]\n"
     ]
    }
   ],
   "source": [
    "sess = tf.Session()\n",
    "print(sess.run(c, feed_dict={a:[[0.1,0.2,0.3]], b:[[10,10,10]]}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.4478302  0.7014905  0.36300516]]\n",
      "[[4 5]]\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "# variable initialized using the glorot uniform initializer\n",
    "var = tf.get_variable(\"first_variable\", shape=[1,3], dtype=tf.float32, initializer=tf.glorot_uniform_initializer)\n",
    "\n",
    "# variable initialized with constant values\n",
    "init_val = np.array([4,5])\n",
    "var2 = tf.get_variable(\"second_variable\", shape=[1,2], dtype=tf.int32, initializer=tf.constant_initializer(init_val))\n",
    "\n",
    "# create the session\n",
    "sess = tf.Session()\n",
    "# initialize all the variables\n",
    "sess.run(tf.global_variables_initializer())\n",
    "\n",
    "print(sess.run(var))\n",
    "\n",
    "print(sess.run(var2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# not trainable variable\n",
    "var2 = tf.get_variable(\"variable\", shape=[1,2], trainable=False, dtype=tf.int32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<tf.Variable 'first_variable:0' shape=(1, 3) dtype=float32_ref>, <tf.Variable 'second_variable:0' shape=(1, 2) dtype=int32_ref>, <tf.Variable 'variable:0' shape=(1, 2) dtype=int32_ref>]\n"
     ]
    }
   ],
   "source": [
    "print(tf.global_variables())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.015899599"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "const1 = tf.constant(3.0, name='constant1')\n",
    "\n",
    "var = tf.get_variable(\"variable1\", shape=[1,2], dtype=tf.float32)\n",
    "var2 = tf.get_variable(\"variable2\", shape=[1,2], trainable=False, dtype=tf.float32)\n",
    "\n",
    "op1 = const1 * var\n",
    "op2 = op1 + var2\n",
    "op3 = tf.reduce_mean(op2)\n",
    "\n",
    "sess = tf.Session()\n",
    "sess.run(tf.global_variables_initializer())\n",
    "sess.run(op3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Simple Linear Regression Example\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch:   0, MSE: 4617.4390, W: 1.295, b: -0.407\n",
      "Epoch:  40, MSE: 5.3334, W: 0.496, b: -0.727\n",
      "Epoch:  80, MSE: 4.5894, W: 0.529, b: -0.012\n",
      "Epoch: 120, MSE: 4.1029, W: 0.512, b: 0.608\n",
      "Epoch: 160, MSE: 3.8552, W: 0.506, b: 1.092\n",
      "Epoch: 200, MSE: 3.7597, W: 0.501, b: 1.418\n",
      "Final weight: 0.500, bias: 1.473\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "\n",
    "np.random.seed(10)\n",
    "tf.set_random_seed(10)\n",
    "\n",
    "W, b = 0.5, 1.4\n",
    "# create a dataset of 100 examples\n",
    "X = np.linspace(0,100, num=100)\n",
    "# add random noise to the y labels\n",
    "y = np.random.normal(loc=W * X + b, scale=2.0, size=len(X))\n",
    "\n",
    "# create the placeholders\n",
    "x_ph = tf.placeholder(shape=[None,], dtype=tf.float32)\n",
    "y_ph = tf.placeholder(shape=[None,], dtype=tf.float32)\n",
    "\n",
    "# create the variables.\n",
    "v_weight = tf.get_variable(\"weight\", shape=[1], dtype=tf.float32)\n",
    "v_bias = tf.get_variable(\"bias\", shape=[1], dtype=tf.float32)\n",
    "\n",
    "# linear computation\n",
    "out = v_weight * x_ph + v_bias\n",
    "\n",
    "# compute the Mean Squared Error\n",
    "loss = tf.reduce_mean((out - y_ph)**2)\n",
    "\n",
    "# optimizer\n",
    "opt = tf.train.AdamOptimizer(0.4).minimize(loss)\n",
    "\n",
    "# create the session\n",
    "session = tf.Session()\n",
    "session.run(tf.global_variables_initializer())\n",
    "\n",
    "# loop to train the parameters\n",
    "for ep in range(210):\n",
    "    # run the optimizer and get the loss\n",
    "    train_loss, _ = session.run([loss, opt], feed_dict={x_ph:X, y_ph:y})\n",
    " \n",
    "    # print epoch number and loss\n",
    "    if ep % 40 == 0:\n",
    "        print('Epoch: %3d, MSE: %.4f, W: %.3f, b: %.3f' % (ep, train_loss, session.run(v_weight), session.run(v_bias)))\n",
    "        \n",
    "print('Final weight: %.3f, bias: %.3f' % (session.run(v_weight), session.run(v_bias)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### .. with TensorBoard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch:   0, MSE: 4617.4390, W: 1.295, b: -0.407\n",
      "Epoch:  40, MSE: 5.3334, W: 0.496, b: -0.727\n",
      "Epoch:  80, MSE: 4.5894, W: 0.529, b: -0.012\n",
      "Epoch: 120, MSE: 4.1029, W: 0.512, b: 0.608\n",
      "Epoch: 160, MSE: 3.8552, W: 0.506, b: 1.092\n",
      "Epoch: 200, MSE: 3.7597, W: 0.501, b: 1.418\n",
      "Final weight: 0.500, bias: 1.473\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "\n",
    "tf.reset_default_graph()\n",
    "\n",
    "np.random.seed(10)\n",
    "tf.set_random_seed(10)\n",
    "\n",
    "W, b = 0.5, 1.4\n",
    "# create a dataset of 100 examples\n",
    "X = np.linspace(0,100, num=100)\n",
    "# add random noise to the y labels\n",
    "y = np.random.normal(loc=W * X + b, scale=2.0, size=len(X))\n",
    "\n",
    "# create the placeholders\n",
    "x_ph = tf.placeholder(shape=[None,], dtype=tf.float32)\n",
    "y_ph = tf.placeholder(shape=[None,], dtype=tf.float32)\n",
    "\n",
    "# create the variables.\n",
    "v_weight = tf.get_variable(\"weight\", shape=[1], dtype=tf.float32)\n",
    "v_bias = tf.get_variable(\"bias\", shape=[1], dtype=tf.float32)\n",
    "\n",
    "# linear computation\n",
    "out = v_weight * x_ph + v_bias\n",
    "\n",
    "# compute the Mean Squared Error\n",
    "loss = tf.reduce_mean((out - y_ph)**2)\n",
    "\n",
    "# optimizer\n",
    "opt = tf.train.AdamOptimizer(0.4).minimize(loss)\n",
    "\n",
    "\n",
    "tf.summary.scalar('MSEloss', loss)\n",
    "tf.summary.histogram('model_weight', v_weight)\n",
    "tf.summary.histogram('model_bias', v_bias)\n",
    "all_summary = tf.summary.merge_all()\n",
    "\n",
    "now = datetime.now()\n",
    "clock_time = \"{}_{}.{}.{}\".format(now.day, now.hour, now.minute, now.second)\n",
    "file_writer = tf.summary.FileWriter('log_dir/'+clock_time, tf.get_default_graph())\n",
    "\n",
    "\n",
    "# create the session\n",
    "session = tf.Session()\n",
    "session.run(tf.global_variables_initializer())\n",
    "\n",
    "# loop to train the parameters\n",
    "for ep in range(210):\n",
    "    # run the optimizer and get the loss\n",
    "    train_loss, _, train_summary = session.run([loss, opt, all_summary], feed_dict={x_ph:X, y_ph:y})\n",
    "    file_writer.add_summary(train_summary, ep)\n",
    " \n",
    "    # print epoch number and loss\n",
    "    if ep % 40 == 0:\n",
    "        print('Epoch: %3d, MSE: %.4f, W: %.3f, b: %.3f' % (ep, train_loss, session.run(v_weight), session.run(v_bias)))\n",
    "        \n",
    "print('Final weight: %.3f, bias: %.3f' % (session.run(v_weight), session.run(v_bias)))\n",
    "file_writer.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
